/*==============================================================================
* This .do file brings in labor market data downloaded from eurostat
==============================================================================*/

clear all
set more off

cd "$insheet_files/Eurostat Web Data"

*-------------------------------------------------------------------------------
* 1. Convert csv into dta
*-------------------------------------------------------------------------------

********************************* Population ***********************************

insheet using demo_r_pjangroup_1_Data.csv, clear
replace value="." if value==":"
destring value, generate(POP_web) ignore(",") force

rename time year
rename geo nuts
rename geo_label region_name

keep if age=="Total" & sex=="Total"

collapse (sum) POP (first) region_name, by(nuts year)

replace POP=POP/1000
replace POP=. if POP==0

sort nuts year
save POP.dta, replace

********************* Economically active population (LF) **********************

insheet using lfst_r_lfp2act_1_Data.csv, clear
replace value="." if value==":"
destring value, generate(LF) ignore(",") force

rename time year
rename geo nuts
rename geo_label region_name

keep if age == "15 years or over" & sex=="Total"
replace LF=. if LF==0

keep year nuts region_name LF

rename LF LF1
save LF_1.dta, replace

insheet using reg_lfh2act_1_Data.csv, clear

replace value="." if value==":"

destring value, generate(LF) ignore(" ") force
rename time year
rename geo nuts
rename geo_label region_name

collapse (sum) LF (first) region_name, by(nuts year)

replace LF=. if LF==0

rename LF LF2
save LF_2.dta, replace

insheet using lfst_r_lfp3pop_1_Data.csv, clear

replace value="." if value==":"
destring value, generate(LF3) ignore(" ") force

rename time year
rename geo nuts

keep if age=="15 years or over"

drop sex age value flagandfootnotes

sort nuts year
save LF_3.dta, replace

******************************* Employment (EMP) *******************************

insheet using reg_lfh2emp_1_Data.csv, clear

replace value="." if value==":"
destring value, generate(EMP) ignore(" ") force

rename time year
rename geo nuts
rename geo_label name_region

collapse (sum) EMP (first) name_region, by(nuts year)

replace EMP=. if EMP==0

rename EMP EMP1

save EMP_1.dta, replace

insheet using lfst_r_lfe2emp_1_Data.csv, clear

replace value="." if value==":"
destring value, generate(EMP) ignore(",") force

rename time year
rename geo nuts
rename geo_label region_name

keep if age == "15 years or over" & sex=="Total"

keep year nuts region_name EMP

replace EMP=. if EMP==0

rename EMP EMP2
save EMP_2.dta, replace

*-------------------------------------------------------------------------------
* 2. Merge the variables, 
*-------------------------------------------------------------------------------
use EMP_1, clear
foreach file in EMP_2 LF_1 LF_2 LF_3 POP {
	merge 1:1 nuts year using `file'.dta, nogen 
}

replace nuts="DE403&DE405&DE409&DE40A&DE40C&DE40D&DE40F&DE40I" if nuts=="DE41"
replace nuts="DE401&DE402&DE404&DE406&DE407&DE408&DE40B&DE40E&DE40G&DE40H" if nuts=="DE42"
replace nuts="DED4" if nuts=="DED1"
replace nuts="DED5" if nuts=="DED3"
replace nuts="ITH" if nuts=="ITD"
replace nuts="ITH1" if nuts=="ITD1"
replace nuts="ITH2" if nuts=="ITD2"
replace nuts="ITH3" if nuts=="ITD3"
replace nuts="ITH4" if nuts=="ITD4"
replace nuts="ITH5" if nuts=="ITD5"
replace nuts="ITI" if nuts=="ITE"
replace nuts="ITI1" if nuts=="ITE1"
replace nuts="ITI2" if nuts=="ITE2"
replace nuts="ITI3" if nuts=="ITE3"
replace nuts="ITI4" if nuts=="ITE4"
replace nuts="FI1D1&FI1D2&FI1D3&FI1D4" if nuts=="FI13"
replace nuts="FI1B&FI1C" if nuts=="FI18"
replace nuts="FI1D5&FI1D6&FI1D7" if nuts=="FI1A"
replace nuts="UKD6" if nuts=="UKD2"
replace nuts="UKD7" if nuts=="UKD5"
replace nuts="UKF25" if nuts=="UKF23"
replace nuts="UKE44" if nuts=="UKE43"
replace nuts="DEF&DE6&DE5&DE9&DEA&DEB&DEC&DE1&DE2&DE3&DE7" if nuts=="DE"&year<1991

********************************************************************************
* SOMETHING OFF FOR DE30 bw 1999-2000. The following variables are affected: 
replace EMP2 = . if nuts=="DE30" & year>=1999 & year<=2000
replace LF1 =. if nuts=="DE30" & year>=1999 & year<=2000
replace LF3 =. if nuts=="DE30" & year>=1999 & year<=2000
********************************************************************************
*-------------------------------------------------------------------------------
* 3. Take median over various sources
*-------------------------------------------------------------------------------
drop if year<1996 
collapse (mean) EMP* LF* POP (first) region_name, by(nuts year)

egen LF_web	=rowmedian(LF1 LF2 LF3)
egen EMP_web	=rowmedian(EMP1 EMP2)
gen  UNEMP_web	=LF_web - EMP_web
	replace UNEMP = . if UNEMP<0

keep year nuts region_name LF_web EMP_web UNEMP_web POP

*-------------------------------------------------------------------------------
* 4. Save the data
*-------------------------------------------------------------------------------
sort nuts year

label var LF_web "Labor force, Eurostat web"
label var EMP_web "Employed, Eurostat web"
label var UNEMP_web "Unemployed, Eurostat web"

gen country=substr(nuts,1,2)
keep if country=="UK"|country=="SE"|country=="NL"|country=="IT"| ///
	country=="FR"|country=="ES"|country=="DK"|country=="DE"| ///
	country=="CH"|country=="BE"|country=="AT"|country=="FI"
drop country

save "$dta_files/Eurostat_web_1996_2011.dta", replace

foreach var in EMP_1 EMP_2 LF_1 LF_2 LF_3 POP {
	rm `var'.dta 
}

